#####################################################################################################################################
#####################################################################################################################################
# Replication File: "Reducing Bias in Citizens' Perception of Crime Rates: 
# Evidence from a Field Experiment on Burglary Prevalence" 
# Authors: Martin Vin�s Larsen & Asmus Leth Olsen                                                                                   
# The Journal of Politics                                                                                                           
#####################################################################################################################################
#####################################################################################################################################
   
#clean working environment
rm(list = ls())

#load 
library(tidyverse)
library(dplyr)
library(stargazer)

# Insert file path to "2_raw data.csv"
raw_data <-read.csv2("2_raw data.csv")

#check data
head(raw_data)
str(raw_data)

################################################################## 
################################################################## 
######### RECODING FOR ANALYSIS IN THE ARTICLE
################################################################## 
################################################################## 

#see excel-file "data description" for description of all variables in the raw data file.

#recoding for data used in analysis 

df_full <- raw_data %>% 
  mutate(
    
  #respondent Id
  id = Id,  

  #coding treatment variable to individual dummies  
  stat   = ifelse(flyer==1 | flyer==2 | flyer==5, 1, 0),
  resp   = ifelse(flyer==3 | flyer==4 | flyer==5, 1, 0),
  pos    = ifelse(flyer==2 | flyer==4 | flyer==6, 1, 0),
  neg    = ifelse(flyer==1 | flyer==3 | flyer==6, 1, 0),
  placebo= ifelse(flyer==7, 1, 0),
  
  ###all leaflets
  leaflet = flyer,
  
  ###define different control groups  
  nonstat_t = ifelse(stat==0 & placebo==0,1,0), #non-statistics leaflets
  nonstat   = ifelse(stat==0 | placebo==1,1,0), #non-statistics leaflets including placebo

  ###recoding dates
  date     = as.Date(dato, format="%d-%m-%Y"), #date if inviate to response
  date_resp = as.Date(W2_FinishTime, format="%d-%m-%Y"), #data for actual response
  days_to_resp = difftime(date_resp, date, units = c("days")), #days between invite and actual response
  
  ###making three equal N groups of time of invite to response in wave 2  
  days = ifelse(date>="2017-11-27" & date<"2017-12-03" ,"1_days7_12","0_pre"),
  days = ifelse(date>="2017-12-03" & date<"2017-12-09" ,"2_days13_18",days),
  days = ifelse(date>="2017-12-09","3_days19_25",days),  
  
  ###all dependent variables raw versions
  trend_w1 = q7_1_resp,
  trend_w2 = W2_q7_1_resp,
  level_w1 = q6_1_resp,
  level_w2 = W2_q6_1_resp,
  relative_w1 = q8_1_resp,
  relative_w2 = W2_q8_1_resp,

  ###recoding all dependent variables as percent correct responeses
  c_trend_w1 = as.numeric(I(q7_1_resp==1)*100),
  c_trend_w2 = as.numeric(I(W2_q7_1_resp==1)*100),
  c_level_w1_exact = as.numeric(I(q6_1_resp==9)*100),
  c_level_w2_exact = as.numeric(I(W2_q6_1_resp==9)*100),
  c_level_w1_pm2 = as.numeric(I(q6_1_resp>=7 & q6_1_resp<=11)*100),
  c_level_w2_pm2 = as.numeric(I(W2_q6_1_resp>=7 & W2_q6_1_resp<=11)*100),  
  c_relative_w1 = as.numeric(I(q8_1_resp==farvekode)*100),
  c_relative_w2 = as.numeric(I(W2_q8_1_resp==farvekode)*100),
      
  ###recoding all PLACEBO dependent variables (UNEMPLOYMENT) as percent correct responeses
  p_trend_w1 = as.numeric(I(q7_2_resp==1)*100),
  p_trend_w2 = as.numeric(I(W2_q7_2_resp==1)*100),
  p_level_w1_exact = as.numeric(I(q6_2_resp==5)*100),
  p_level_w2_exact = as.numeric(I(W2_q6_2_resp==5)*100),
  p_level_w1_pm2 = as.numeric(I(q6_2_resp>=3 & q6_2_resp<=7)*100),
  p_level_w2_pm2 = as.numeric(I(W2_q6_2_resp>=3 & W2_q6_2_resp<=7)*100),
  p_relative_w1 = as.numeric(I(q8_2_resp==unempkode)*100),
  p_relative_w2 = as.numeric(I(W2_q8_2_resp==unempkode)*100),
  
  ###background variables
  female = q18-1,
  age = 2017-q19,
  burg_fear = q1_1_resp,
  pol_intr = q15,
  education = ifelse(q20==8 | q20==3,7,q20),
  income = ifelse(q22==12,11,q22),
  income = ifelse(q22==9 | q22==10,8,income),
  region = Region,
  local_crime = farvekode,
  local_unemp = unempkode,
  w2_resp = ifelse(is.na(W2_StartTime)==T,0,1),
  got_leaflet = ifelse(W2_q25==1,1,0)
          ) %>%
  
  #select variables
  select(id, stat,resp,pos,neg,placebo,nonstat_t,nonstat,leaflet, date,date_resp,days_to_resp,days,
         trend_w1,trend_w2,level_w1,level_w2,relative_w1,relative_w2,
         c_trend_w1,c_trend_w2,c_level_w1_exact,c_level_w2_exact,c_level_w1_pm2,c_level_w2_pm2,c_relative_w1,c_relative_w2,
         p_trend_w1,p_trend_w2,p_level_w1_exact,p_level_w2_exact,p_level_w1_pm2,p_level_w2_pm2,p_relative_w1,p_relative_w2,
         female,age,burg_fear,pol_intr,education,income,region,local_crime,local_unemp,w2_resp,got_leaflet
         ) 
  
#only subjects responding in wave 2 
df <- df_full %>% filter(w2_resp==1) 

#file including respondents that only particpated in wave 1 
str(df_full)

#main analysis file
str(df)

#make sure the mechanics work as intended
table(df$stat,df$nonstat)
table(df$days,df$date)

##############################################################
##############################################################
#THE END
##############################################################
##############################################################
